from lxml import etree
import requests


url = 'https://movie.douban.com/top250'
headers ={
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0'
}
response = requests.get(url, headers=headers).text
# 1.将本地的html文档的源码加载到etree对象
tree = etree.parse('test.html')
# 2.将互联网上获取的源码数据加载到etree对象
tree = etree.HTML(response)